---
title: "Code for Thesis"
author: "Ruonan Song"
date: "November 16, 2015"
output: html_document
---

## Import data
```{r}
library(xlsx)
library(plotrix)
ER <- read.xlsx("NEER_Index.xlsx",1)
data <- read.xlsx("CPI.xlsx",1)
twoord.plot(ER$year, ER$REERindex_China, ER$year, ER$em_indus_1, xlab = "Year",
            ylab = "REER Index", rylab = "Employment in Primary Sector(Million)",
            lpch = 16, rpch = 17, rcol = "blue")
twoord.plot(ER$year, ER$REERindex_China, ER$year, ER$em_indus_2, xlab = "Year",
            ylab = "REER Index", rylab = "Employment in Secondary Sector(Million)",
            lpch = 16, rpch = 17, rcol = "blue")
twoord.plot(ER$year, ER$REERindex_China, ER$year, ER$em_indus_3, xlab = "Year",
            ylab = "REER Index", rylab = "Employment in Tertiary Sector(Million)",
            lpch = 16, rpch = 17, rcol = "blue")
fig_province <- data$Province[404:422]
fig_CPI <- data$CPI_China[404:422]
names <- c("Beijing","Tianjin","Hebei","Shanxi","Neimenggu","Liaoning","Jilin",
           "Heilongjiang","Shanghai","Jiangsu","Zhejiang","Anhui","Fujian",
           "Jiangxi","Shandong","Henan","Hubei","Hunan","Guangdong")
barplot(fig_CPI, ylim = c(95,105), names.arg = names)
```


## Construct real effective exchange rate at province level
```{r}
revER <- ER[order(nrow(ER):1),]
n <- 14
NEER <- rep(0, n*31)
for (i in 1:n) {
  NEER[(1 + 31 * (i - 1)):(31 + 31 * (i - 1))] <- rep(revER$NEERindex_China[i], 31)
}
REER <- rep(0, n*31)
CPI <- data$CPI_China[1:(n*31)]
data$REER <- NEER/CPI
summary(data$REER)
```

## Two Steps Methods
```{r}
data$Z <- data$Export / data$Output
data$LI_1 <- data$totalw_1/ data$Output_1
data$LI_2 <- data$totalw_2/ data$Output_2
data$LI_3 <- data$totalw_3/ data$Output_3
```

#-----------------------------------------
#         For the Primary Industry
#-----------------------------------------

## The Naive OLS Model
```{r}
Naive_indus1 <- lm(log(em_1) ~ log(Output_1) + log(Z) + log(LI_1) + log(REER) 
                + log(em1_lag) + R, data = data)
summary(Naive_indus1)
```
## The Two Step Model
## Step 1
```{r}
step1_1_1 <- lm(log(Output_1) ~ log(REER), data = data)
residual1 <- log(data$Output_1)-predict(step1_1_1)
step1_2_1 <- lm(log(Z) ~ log(REER), data = data)
residual2 <- log(data$Z)-predict(step1_2_1)
step1_3_1 <- lm(log(data$LI) ~ log(REER), data = data)
residual3 <- log(data$LI_1)-predict(step1_3_1)
```
## Step 2
```{r}
step2_indus1 <- lm(log(em_1) ~ residual1 + residual2 + residual3 + log(REER) 
                   + log(em1_lag) + R, data = data)
summary(step2_indus1)
pool_indus1 <- plm(log(em_1) ~ residual1 + residual2 + residual3 + log(REER) 
                   + log(em1_lag) + R, data = data, index = c("Province", "Time"), 
                   model = "pooling")
summary(pool_indus1)
library(tseries)
adf.test(data$em_1, k=2) ## test for unit root
library(lmtest)
bptest(step2_indus1, data = data, studentize=F) ## test for heteroskedasticity
coeftest(step2_indus1) 
coeftest(step2_indus1, vcovHC) ## robust estimator
```

## Fixed Effect VS. Random Effect
```{r}
library(plm)
pool_indus1 <-  plm(log(em_1) ~ residual1 + residual2 + residual3 + log(REER) 
            + log(em1_lag), data = data, index = c("Province", "Time"), 
            model = "pooling")
fix_indus1 <-  plm(log(em_1) ~ residual1 + residual2 + residual3 + log(REER) 
            + log(em1_lag) + R, data = data, index = c("Province", "Time"), 
            model = "within")
fixtime_indus1 <-  plm(log(em_1) ~ residual1 + residual2 + residual3 + log(REER) 
            + log(em1_lag) + factor(Time), data = data, index = c("Province", "Time"), 
            model = "within")
random_indus1 <-  plm(log(em_1) ~ residual1 + residual2 + residual3 + log(REER) 
            + log(em1_lag) + R, data = data, index = c("Province", "Time"), 
            model = "random")
summary(pool_indus1)
summary(fix_indus1)
summary(fixtime_indus1)
summary(random_indus1)
plmtest(pool_indus1, type = c("bp")) ## Reject the null hypothesis that random effect is not appropriate
pcdtest(fix_indus1, test = c("lm"))
pcdtest(fix_indus1, test = c("cd")) ## Cross sectional dependence
pbgtest(step2_indus1) ## Serial Correlation
phtest(fix_indus1, random_indus1) ## In favor of random effect model
phtest(fixtime_indus1, fix_indus1) ## Use time-fixed effect

```
## Regression Discontinuity
```{r}
install.packages("devtools", dependencies = TRUE)
library(devtools)
install_github("jgabry/QMSS_package")
library(QMSS)
library(ggplot2)
library(plyr)
table(data$R)
decrease_1 <- rep(0, (n-1)*31)
for (i in 1: 403) {
  if (data$em_1[i] <= data$em_1[i+31]) {
    decrease_1[i] <- 1
  } else {
    decrease_1[i] <- 0
  }
}
regime <- data$R[1:403]
time <- data$Time[1:403]
table(decrease_1, regime)
sub <- cbind(regime, decrease_1, time)
sub <- as.data.frame(sub)
summary(sub$regime)
mean_of_decrease <-  rep(0, 13)
for (i in 1 : 13) {
  d <- decrease_1[(1 + (i-1)*31) : (i*31)]
  mean_of_decrease[i] <- mean(d)
}
year <- c(2013,2012,2011,2010,2009,2008,2007,2006,2005,2004,2003,2002,2001)
byyear <- plot(year, mean_of_decrease, type = "l", col = "red3")
byyear +  abline(v = 2005, lty = 2)
sub$timeY <- ifelse(sub$time > 2005, 0, sub$time - 2005)
sub$timeO <- ifelse(sub$time <= 2005, 0, sub$time - 2005)
sub$intY <- ifelse(sub$time > 2005, 0, 1)
sub$intO <- ifelse(sub$time <= 2005, 0, 1)
lm.decrease1 <- lm(decrease_1 ~ 0 + intY + intO + timeY + timeO, data= sub) ## parametric method
summary(lm.decrease1)
## plot discontinuity
sub$yhat <- predict(lm.decrease1)
ddply(sub, "time", summarize, yhat = mean(yhat), freq = length(time))
no_legend <- theme(legend.position = "none")
g_disc <- ggplot(sub, aes(x = time, y = yhat, group = intY, color = factor(intY))) + no_legend
g_disc + geom_line(size = 1.25) + geom_vline(xintercept = 2005, lty = 2) 
## Non-parametric method
library(rdd)
rd.godoc <- RDestimate(decrease_1 ~ time, data = sub, cutpoint = 2005)
summary(rd.godoc)
RDplot(rd.godoc, col = c("blue","green"), pts = T, xlab = "Year")
```

#-----------------------------------------
#         For the Secondary Industry
#-----------------------------------------
```{r}
Naive_indus2 <- lm(log(em_2) ~ log(Output_2) + log(Z) + log(LI_2) + log(REER) 
                + log(em2_lag) + R, data = data)
summary(Naive_indus2)
```
## The Two Step Model
## Step 1
```{r}
step1_1_2 <- lm(log(Output_2) ~ log(REER), data = data)
residual1_2 <- log(data$Output_2)-predict(step1_1_2)
step1_2_2 <- lm(log(Z) ~ log(REER), data = data)
residual2_2 <- log(data$Z)-predict(step1_2_2)
step1_3_2 <- lm(log(data$LI_2) ~ log(REER), data = data)
residual3_2 <- log(data$LI_2)-predict(step1_3_2)
```
## Step 2
```{r}
step2_indus2 <- lm(log(em_2) ~ residual1_2 + residual2_2 + residual3_2 + log(REER) 
                   + log(em2_lag) + R, data = data)
summary(step2_indus2)
bptest(step2_indus2, data = data, studentize=F) ## test for heteroskedasticity
coeftest(step2_indus2) 
coeftest(step2_indus2, vcovHC) ## robust estimat
```
## Fixed effect VS. Random effect
```{r}
fix_indus2 <-  plm(log(em_2) ~ residual1_2 + residual2_2 + residual3_2 + log(REER) 
            + log(em2_lag) + R, data = data, index = c("Province", "Time"), 
            model = "within")
random_indus2 <-  plm(log(em_2) ~ residual1_2 + residual2_2 + residual3_2 + log(REER) 
            + log(em2_lag) +R, data = data, index = c("Province", "Time"), 
            model = "random")
summary(fix_indus2)
summary(random_indus2)
```

#-----------------------------------------
#         For the Tertiary Industry
#-----------------------------------------
```{r}
Naive_indus3 <- lm(log(em_3) ~ log(Output_3) + log(Z) + log(LI_3) + log(REER) 
                + log(em3_lag) + R, data = data)
summary(Naive_indus3)
```
## The Two Step Model
## Step 1
```{r}
step1_1_3 <- lm(log(Output_3) ~ log(REER), data = data)
residual1_3 <- log(data$Output_3)-predict(step1_1_3)
step1_2_3 <- lm(log(Z) ~ log(REER), data = data)
residual2_3 <- log(data$Z)-predict(step1_2_3)
step1_3_3 <- lm(log(data$LI_3) ~ log(REER), data = data)
residual3_3 <- log(data$LI_3)-predict(step1_3_3)
```
## Step 2
```{r}
step2_indus3 <- lm(log(em_3) ~ residual1_3 + residual2_3 + residual3_3 + log(REER) 
                   + log(em3_lag) + R, data = data)
summary(step2_indus3)
bptest(step2_indus1, data = data, studentize=F) ## test for heteroskedasticity
coeftest(step2_indus3) 
coeftest(step2_indus3, vcovHC) ## robust estimat
```
## Fixed effect VS. Random effect
```{r}
fix_indus3 <-  plm(log(em_3) ~ residual1_3 + residual2_3 + residual3_3 + log(REER) 
            + log(em3_lag) + R, data = data, index = c("Province", "Time"), 
            model = "within")
random_indus3 <-  plm(log(em_3) ~ residual1_3 + residual2_3 + residual3_3 + log(REER) 
            + log(em3_lag) +R, data = data, index = c("Province", "Time"), 
            model = "random")
summary(fix_indus3)
summary(random_indus3)
```



